import time
import re
import pymongo
import redis
from get_cookies import getcookies
from concurrent.futures import ProcessPoolExecutor
from parse import parse
import random
import aiohttp
import asyncio


async def crawl(cookies_list,offset,semaphore):
    header = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'www.cnvd.org.cn',
        'Origin': 'https://www.cnvd.org.cn',
        'Referer': 'https://www.cnvd.org.cn/flaw/list.htm?flag=true',
        'Upgrade-Insecure-Requests': '1',
        'Cookie': '{}'.format(random.choice(cookies_list)),
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
    }
    data = {
        'number': '请输入精确编号',
        'startDate': '',
        'endDate': '',
        'field': '',
        'flag': 'true',
        'order': '',
        'max': '20',
        'offset': '{}'.format(offset)
    }
    async with semaphore:
        async with aiohttp.ClientSession()as session:
            async with session.post(url="https://www.cnvd.org.cn/flaw/list.htm?flag=true",headers=header,data=data)as r:
                res=await r.text(encoding = 'utf-8')
                pattern = r'/flaw/show/.+-\d+-\d+'
                url_list = ['https://www.cnvd.org.cn'+url for url in re.compile(pattern).findall(res)][:20] #获取url池





            # parse_data = parse(data)
            # await cur.insert_one(parse_data)

    # req = requests.post("https://www.cnvd.org.cn/flaw/list.htm?flag=true", headers = header, data = data).text
    # pattern = r'/flaw/show/.+-\d+-\d+'
    # print(re.compile(pattern).findall(req))


def set_cookies():
    obj = [getcookies(conn, Queue) for i in range(10)]
    with ProcessPoolExecutor(max_workers = 4)as p:
        p.map(obj)
    p.shutdown()
    return

tasks=[]
async def run():
    lock = asyncio.Semaphore(5)
    task=[crawl(cookies_list,offset,lock)for offset in range(0,10001,20)]
    await asyncio.wait(task)
    await asyncio.sleep(10)



if __name__ == '__main__':
    cookies_list = []
    loop = asyncio.get_event_loop()
    #数据库配置
    #redis
    conn = redis.Redis(host = '106.14.144.54', port = 6379, db = 0, password = 'Wzl!13433627612')
    Queue = 'msg_Cookies'
    #mongodb
    client = pymongo.MongoClient('mongodb://localhost:27017/')
    db = client['信息安全漏洞分享平台']
    cur = db['信息安全漏洞分享平台']
    # #开一个进程处理获取cookies的过程
    # p = Process(target = set_cookies,args = (conn,Queue,))
    # p.run()
    while True:
        try:
            for i in range(10):
                cookies_list.append(conn.rpop(Queue).decode('utf-8'))
            break
        except:
            pass
    loop.run_until_complete(run())
    loop.close()
